{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Spam Classification Model (Sklearn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Wrap a ML model for use as a prediction microservice in seldon-core\n",
    "- Run locally on Docker to test\n",
    "- Deploy on seldon-core running on k8s cluster"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train Locally"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "from sklearn.externals import joblib\n",
    "from pathlib import Path\n",
    "import string\n",
    "from nltk.stem import SnowballStemmer\n",
    "from nltk.corpus import stopwords\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "import pickle\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.metrics import accuracy_score\n",
    "model_path: Path=Path('./')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9730861244019139"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"spam.csv\",encoding='latin-1')\n",
    "data = data.drop([\"Unnamed: 2\", \"Unnamed: 3\", \"Unnamed: 4\"], axis=1)\n",
    "data = data.rename(columns={\"v1\":\"class\", \"v2\":\"text\"})\n",
    "data.head()\n",
    "\n",
    "def pre_process(text):\n",
    "    text = text.translate(str.maketrans('', '', string.punctuation))\n",
    "    text = [word for word in text.split() if word.lower() not in stopwords.words('english')]\n",
    "    words = \"\"\n",
    "    for i in text:\n",
    "            stemmer = SnowballStemmer(\"english\")\n",
    "            words += (stemmer.stem(i))+\" \"\n",
    "    return words\n",
    "\n",
    "features = data['text'].copy()\n",
    "features = features.apply(pre_process)\n",
    "\n",
    "vectorizer = TfidfVectorizer(\"english\")\n",
    "_features = vectorizer.fit_transform(features)\n",
    "with open('skl-spam-classifier/model/vectorizer.pkl', 'wb') as vect:\n",
    "    pickle.dump(vectorizer, vect)\n",
    "    \n",
    "vectorizer = joblib.load(model_path.joinpath('skl-spam-classifier/model/vectorizer.pkl'))\n",
    "train_x, test_x, train_y, test_y = train_test_split(_features, data['class'], test_size=0.3, random_state=0)\n",
    "svc = SVC(kernel='sigmoid', gamma=1.0, probability=True)\n",
    "svc.fit(train_x,train_y)\n",
    "# save the model to disk\n",
    "filename = 'skl-spam-classifier/model/model.pkl'\n",
    "pickle.dump(svc, open(filename, 'wb'))\n",
    "\n",
    "clf = joblib.load(model_path.joinpath(filename))\n",
    "\n",
    "prediction = clf.predict(test_x)\n",
    "accuracy_score(test_y,prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.02762687, 0.97237313]])"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "message = np.array(['click here to win the price'])\n",
    "data = vectorizer.transform(message).todense()\n",
    "probas = clf.predict_proba(data)\n",
    "probas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['ham', 'spam'], dtype=object)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.classes_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Spam Classification Model (keras)\n",
    "\n",
    "- Wrap a ML model for use as a prediction microservice in seldon-core\n",
    "- Run locally on Docker to test\n",
    "- Deploy on seldon-core running on k8s cluster"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "you can find data here: https://www.kaggle.com/benvozza/spam-classification/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train Locally"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "non-resource variables are not supported in the long term\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from keras.models import Model\n",
    "from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding\n",
    "from keras.optimizers import RMSprop\n",
    "from keras.preprocessing.text import Tokenizer\n",
    "from keras.preprocessing import sequence\n",
    "from keras.utils import to_categorical\n",
    "from keras.callbacks import EarlyStopping\n",
    "import pickle\n",
    "from sklearn.externals import joblib\n",
    "import tensorflow.compat.v1 as tf\n",
    "tf.disable_v2_behavior()\n",
    "from keras.engine.saving import model_from_json\n",
    "from keras.layers import (\n",
    "    Bidirectional,\n",
    "    concatenate,\n",
    "    Dense,\n",
    "    Embedding,\n",
    "    LSTM,\n",
    "    Masking,\n",
    "    Reshape,\n",
    "    SpatialDropout1D,\n",
    "    TimeDistributed,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv(\"spam.csv\",encoding='latin-1')\n",
    "\n",
    "data = data.drop([\"Unnamed: 2\", \"Unnamed: 3\", \"Unnamed: 4\"], axis=1)\n",
    "data = data.rename(columns={\"v1\":\"class\", \"v2\":\"text\"})\n",
    "\n",
    "X = data.text\n",
    "Y = data['class']\n",
    "le = LabelEncoder()\n",
    "Y = le.fit_transform(Y)\n",
    "Y = Y.reshape(-1,1)\n",
    "\n",
    "X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15)\n",
    "max_words = 1000\n",
    "max_len = 150\n",
    "tokenizer = Tokenizer(num_words=max_words)\n",
    "tokenizer.fit_on_texts(X_train)\n",
    "\n",
    "with open('keras-spam-classifier/model/tokenizer.pkl', 'wb') as tok:\n",
    "    pickle.dump(tokenizer, tok)\n",
    "\n",
    "    \n",
    "sequences = tokenizer.texts_to_sequences(X_train)\n",
    "sequences_matrix = sequence.pad_sequences(sequences,maxlen=max_len)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def RNN():\n",
    "    inputs = Input(name='inputs',shape=[max_len])\n",
    "    layer = Embedding(max_words,50,input_length=max_len)(inputs)\n",
    "    layer = LSTM(64)(layer)\n",
    "    layer = Dense(256,name='FC1')(layer)\n",
    "    layer = Activation('relu')(layer)\n",
    "    layer = Dropout(0.5)(layer)\n",
    "    layer = Dense(1,name='out_layer')(layer)\n",
    "    layer = Activation('sigmoid')(layer)\n",
    "    model = Model(inputs=inputs,outputs=layer)\n",
    "    return model\n",
    "\n",
    "model = RNN()\n",
    "model.summary()\n",
    "model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics=['accuracy'])\n",
    "\n",
    "model.fit(sequences_matrix,Y_train,batch_size=128,epochs=10,\n",
    "validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.0001)])\n",
    "\n",
    "#save model\n",
    "model_json = model.to_json()\n",
    "with open(\"keras-spam-classifier/model/architecture.json\", \"w\") as json_file:\n",
    "    json_file.write(model_json)\n",
    "model.save_weights(\"keras-spam-classifier/model/weights.h5\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### wrap each model component using s2i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---> Installing application source...\n",
      "---> Installing dependencies ...\n",
      "Looking in links: /whl\n",
      "Collecting scikit-learn==0.21.2 (from -r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/85/04/49633f490f726da6e454fddc8e938bbb5bfed2001681118d3814c219b723/scikit_learn-0.21.2-cp36-cp36m-manylinux1_x86_64.whl (6.7MB)\n",
      "Requirement already satisfied: numpy>=1.9.2 in /usr/local/lib/python3.6/site-packages (from -r requirements.txt (line 2)) (1.16.3)\n",
      "Collecting keras (from -r requirements.txt (line 3))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/ad/fd/6bfe87920d7f4fd475acd28500a42482b6b84479832bdc0fe9e589a60ceb/Keras-2.3.1-py2.py3-none-any.whl (377kB)\n",
      "Collecting joblib>=0.11 (from scikit-learn==0.21.2->-r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)\n",
      "Collecting scipy>=0.17.0 (from scikit-learn==0.21.2->-r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/29/50/a552a5aff252ae915f522e44642bb49a7b7b31677f9580cfd11bcc869976/scipy-1.3.1-cp36-cp36m-manylinux1_x86_64.whl (25.2MB)\n",
      "Requirement already satisfied: h5py in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (2.9.0)\n",
      "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.0.9)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (5.1)\n",
      "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.0.7)\n",
      "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.12.0)\n",
      "Installing collected packages: joblib, scipy, scikit-learn, keras\n",
      "Successfully installed joblib-0.14.0 keras-2.3.1 scikit-learn-0.21.2 scipy-1.3.1\n",
      "Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "You are using pip version 18.1, however version 19.3.1 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\n",
      "Build completed successfully\n"
     ]
    }
   ],
   "source": [
    "!s2i build keras-spam-classifier/ seldonio/seldon-core-s2i-python3:0.7 spam-classifier:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "59be46468915231d3915161343486d083d547fde192baa7367ff411efe34c52f\r\n"
     ]
    }
   ],
   "source": [
    "!docker run --name \"spam-classifier\" -d --rm -p 5000:5000 spam-classifier:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"data\":{\"ndarray\":[\"0.9779371008528993\",\"spam\"]},\"meta\":{}}\r\n"
     ]
    }
   ],
   "source": [
    "!curl -g http://localhost:5000/predict --data-urlencode 'json={\"data\": {\"names\": [\"message\"], \"ndarray\": [\"click here to win the price\"]}}'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "spam-classifier\r\n"
     ]
    }
   ],
   "source": [
    "!docker rm spam-classifier --force"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---> Installing application source...\n",
      "---> Installing dependencies ...\n",
      "Looking in links: /whl\n",
      "Collecting scikit-learn==0.21.2 (from -r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/85/04/49633f490f726da6e454fddc8e938bbb5bfed2001681118d3814c219b723/scikit_learn-0.21.2-cp36-cp36m-manylinux1_x86_64.whl (6.7MB)\n",
      "Requirement already satisfied: numpy>=1.9.2 in /usr/local/lib/python3.6/site-packages (from -r requirements.txt (line 2)) (1.16.3)\n",
      "Collecting keras (from -r requirements.txt (line 3))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/ad/fd/6bfe87920d7f4fd475acd28500a42482b6b84479832bdc0fe9e589a60ceb/Keras-2.3.1-py2.py3-none-any.whl (377kB)\n",
      "Collecting scipy>=0.17.0 (from scikit-learn==0.21.2->-r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/29/50/a552a5aff252ae915f522e44642bb49a7b7b31677f9580cfd11bcc869976/scipy-1.3.1-cp36-cp36m-manylinux1_x86_64.whl (25.2MB)\n",
      "Collecting joblib>=0.11 (from scikit-learn==0.21.2->-r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)\n",
      "Requirement already satisfied: h5py in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (2.9.0)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (5.1)\n",
      "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.12.0)\n",
      "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.0.7)\n",
      "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/site-packages (from keras->-r requirements.txt (line 3)) (1.0.9)\n",
      "Installing collected packages: scipy, joblib, scikit-learn, keras\n",
      "Successfully installed joblib-0.14.0 keras-2.3.1 scikit-learn-0.21.2 scipy-1.3.1\n",
      "Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "You are using pip version 18.1, however version 19.3.1 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\n",
      "Build completed successfully\n"
     ]
    }
   ],
   "source": [
    "!s2i build keras-spam-classifier/ seldonio/seldon-core-s2i-python3:0.7 keras-spam-classifier:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cf8ebd9bff95cb81ca0cd39393e5c3f2707d7b6394ddc0ace15e41a47248c3e8\r\n"
     ]
    }
   ],
   "source": [
    "!docker run --name \"keras-spam-classifier\" --rm -d -p 5000:5000  keras-spam-classifier:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---> Installing application source...\n",
      "---> Installing dependencies ...\n",
      "Looking in links: /whl\n",
      "Collecting goslate (from -r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/39/0b/50af938a1c3d4f4c595b6a22d37af11ebe666246b05a1a97573e8c8944e5/goslate-1.5.1.tar.gz\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.6/site-packages (from -r requirements.txt (line 2)) (1.16.3)\n",
      "Collecting futures (from goslate->-r requirements.txt (line 1))\n",
      "  Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "Downloading https://files.pythonhosted.org/packages/05/80/f41cca0ea1ff69bce7e7a7d76182b47bb4e1a494380a532af3e8ee70b9ec/futures-3.1.1-py3-none-any.whl\n",
      "Building wheels for collected packages: goslate\n",
      "Running setup.py bdist_wheel for goslate: started\n",
      "Running setup.py bdist_wheel for goslate: finished with status 'done'\n",
      "Stored in directory: /root/.cache/pip/wheels/4f/7f/28/6f52271012a7649b54b1a7adaae329b4246bbbf9d1e4f6e51a\n",
      "Successfully built goslate\n",
      "Installing collected packages: futures, goslate\n",
      "Successfully installed futures-3.1.1 goslate-1.5.1\n",
      "Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n",
      "You are using pip version 18.1, however version 19.3.1 is available.\n",
      "You should consider upgrading via the 'pip install --upgrade pip' command.\n",
      "Build completed successfully\n"
     ]
    }
   ],
   "source": [
    "!s2i build Translator/ seldonio/seldon-core-s2i-python3:0.7 translator:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26d5f31364f5095680bd89aabae468f0c4cec0f8cc7da5de4ea77434e4836692\r\n"
     ]
    }
   ],
   "source": [
    "!docker run --name \"translator\" -d --rm -p 5000:5000 translator:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"data\":{\"names\":[\"message\"],\"ndarray\":[\"How is your day\"]},\"meta\":{}}\r\n"
     ]
    }
   ],
   "source": [
    "!curl -g http://localhost:5000/transform-input --data-urlencode 'json={\"data\": {\"names\": [\"message\"], \"ndarray\": [\"Wie läuft dein Tag\"]}}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "translator\r\n"
     ]
    }
   ],
   "source": [
    "!docker rm translator --force"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---> Installing application source...\n",
      "Build completed successfully\n"
     ]
    }
   ],
   "source": [
    "!s2i build Combiner/ seldonio/seldon-core-s2i-python3:0.7 combiner:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "19fbbdfb073c7da1056611aa18d8c7d3da9a533010667638d8c0cb0abfe6e257\r\n"
     ]
    }
   ],
   "source": [
    "!docker run --name \"model-combiner\" -d --rm -p 5000:5000 combiner:1.0.0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!curl -g http://localhost:5000/aggregate --data-urlencode 'json={\"data\": {\"names\": [\"message\"], \"ndarray\": [[\"0.7\",\"Spam\"], [\"0.80\", \"Spam\"]]}}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "model-combiner\r\n"
     ]
    }
   ],
   "source": [
    "!docker rm model-combiner --force"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Assuming you have kubernetes cluster running and seldon-core installed, you can deploy your Machine Learning model using:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "kubectl apply -f deploy.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
